恭喜Taiwan R User Group 三歲了
恭喜Taiwan R User Group 三歲了
今天有殘酷擂台
當然要講一些 有的沒有的有趣的主題
是什麼?可以吃嗎?
身為骨灰級的玩家眼淚都要掉下來了啊
就是有很多怪物的意思
我就稍微爬了一下怪物的資料
library(rvest) library(stringr) library(data.table) library(googleVis) library(ca) library(proxy)
ranking_pages = read_html("http://dqmsl-search.net/ranking/allsbjstatus?hide=&hides=,star1,star2,star3,star4")
urls_xpath = "/html/body/div[@class='mainh']/div[@class='mainc']/div[@class='ccol']/div[@class='mbox'][2]/div[@class='mboxb']/div/div[@class='innnerHideDiv']/div/div/a"
base_url = "http://dqmsl-search.net"
monster_urls = ranking_pages %>%
html_nodes(xpath = urls_xpath) %>%
html_attr(name = "href") %>%
unique() %>%
(function(x) {paste0(base_url, x, sep = "")})
monster_urls[1:3]
## [1] "http://dqmsl-search.net/monster/detail?no=501" ## [2] "http://dqmsl-search.net/monster/detail?no=521" ## [3] "http://dqmsl-search.net/monster/detail?no=543"
ETL is omitted.
monsters = fread("data/monsters.csv")
character_cols = c("id", "name", "rank", "system", "type")
numeric_cols = names(monsters)[!names(monsters) %in% character_cols]
monsters[, c(numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]
monsters[, total := hp + mp + str + def + agi + int]
monsters[, icon := sprintf('<img src="img/icon/%s.gif" alt="%s" width="40">',
str_pad(id, width = 6, pad = "0"), id, name)]
monsters[, name := sprintf("<a href='http://dqmsl-search.net/monster/detail?no=%s'>%s</a>",
id, name)]
monsters[, `:=`(like_percent = like / view * 100,
hate_percent = hate / view * 100)]
monsters = monsters[order(total, decreasing = TRUE),]
mtable = gvisTable(monsters, options = list(page = "enable",
pageSize = 5))
print(mtable, "chart")
print_bar = function(dt, colname, height = 400, width = 900) {
percent = dt[, .(percent = .N), by = colname][
, percent := percent / sum(percent) * 100]
print(gvisBarChart(percent, options = list(height = height, width = width)),
"chart")
}
print_bar(monsters, "rank")
print_bar(monsters, "system")
print_bar(monsters, "type")
system_type = table(monsters$system, monsters$type) system_type
## ## 万能 回復 攻撃 特殊 補助 防御 魔法 ## 悪魔系 5 3 32 10 17 3 52 ## 物質系 0 4 36 7 19 26 10 ## ドラゴン系 2 2 64 1 4 10 1 ## スライム系 6 11 24 10 5 11 3 ## ???系 5 1 21 12 0 0 14 ## ゾンビ系 3 2 27 1 7 4 6 ## 自然系 7 6 27 5 19 20 7 ## 転生系 0 0 0 35 0 0 0 ## 魔獣系 5 3 70 6 21 10 7
system_type_dt = as.data.table(round(prop.table(system_type, margin = 1) * 100, 2))
setnames(system_type_dt, names(system_type_dt), c("system", "type", "count"))
system_type_dt = dcast(system_type_dt, system ~ type, value.var = "count")
yvar = names(system_type_dt)[!names(system_type_dt) %in% "system"]
print(gvisBarChart(system_type_dt, xvar = "system", yvar = yvar,
options = list(isStacked = TRUE, height = 300, width = 900)), "chart")
cafit = ca(system_type)
ca_dt = rbind(data.table(Dim1 = cafit$rowcoord[, 1],
system = cafit$rowcoord[, 2],
system.html.tooltip = rownames(system_type),
type = rep(NA, nrow(system_type)),
type.html.tooltip = rep(NA, nrow(system_type))),
data.table(Dim1 = cafit$colcoord[, 1],
system = rep(NA, ncol(system_type)),
system.html.tooltip = rep(NA, ncol(system_type)),
type = cafit$colcoord[, 2],
type.html.tooltip = colnames(system_type)))
tick_str = "{'ticks': [-5, -4, -3, -2, -1, 0, 1, 2] }"
ca_plot = gvisScatterChart(ca_dt, options = list(width = 500, height = 500,
hAxis = tick_str, vAxis = tick_str))
The total variance of the data matrix is measured by the inertia, which ressembles a chi-square statistic but is calculated on relative observed and expected frequencies.
The cummulative percentage of inertia of two dimension is about 80 %.
cafit
## ## Principal inertias (eigenvalues): ## 1 2 3 4 5 6 ## Value 0.405606 0.186753 0.087494 0.044128 0.013726 0.005737 ## Percentage 54.56% 25.12% 11.77% 5.94% 1.85% 0.77% ## ## ## Rows: ## 悪魔系 物質系 ドラゴン系 スライム系 ???系 ゾンビ系 ## Mass 0.167353 0.139918 0.115226 0.096022 0.072702 0.068587 ## ChiDist 0.870226 0.533945 0.757989 0.669843 0.723305 0.376019 ## Inertia 0.126735 0.039890 0.066203 0.043084 0.038036 0.009698 ## Dim. 1 0.005581 0.303270 0.624426 -0.109549 -0.626504 0.492531 ## Dim. 2 -2.005154 0.456370 0.916871 0.717588 -0.951892 0.032618 ## 自然系 転生系 魔獣系 ## Mass 0.124829 0.048011 0.167353 ## ChiDist 0.525387 2.716489 0.430770 ## Inertia 0.034457 0.354288 0.031054 ## Dim. 1 0.338404 -4.218845 0.402011 ## Dim. 2 0.425708 0.824756 0.426592 ## ## ## Columns: ## 万能 回復 攻撃 特殊 補助 防御 魔法 ## Mass 0.045267 0.043896 0.412894 0.119342 0.126200 0.115226 0.137174 ## ChiDist 0.668747 0.912812 0.447466 1.718554 0.541708 0.752207 1.043109 ## Inertia 0.020245 0.036575 0.082672 0.352467 0.037033 0.065197 0.149256 ## Dim. 1 0.159083 0.238895 0.447837 -2.686864 0.445905 0.480385 0.046881 ## Dim. 2 -0.240881 0.613545 0.387561 0.356418 -0.022284 0.986840 -2.401930
print(ca_plot, "chart")
print(ftable(rank ~ weight, data = monsters))
## rank A B C D E F S SS ## weight ## 2 0 0 0 71 38 15 0 0 ## 3 0 0 80 0 0 0 0 0 ## 6 0 112 0 0 0 0 0 0 ## 9 171 0 0 0 0 0 0 0 ## 14 0 0 0 0 0 0 61 0 ## 18 0 0 0 0 0 0 110 0 ## 23 0 0 0 0 0 0 9 0 ## 27 0 0 0 0 0 0 0 53 ## 32 0 0 0 0 0 0 0 9
Here we choose SS monsters to find distance of monsters via hp, mp, str, def, agi and int.
body_cols = c("hp", "mp", "str", "def", "agi", "int")
scaled_body_cols = paste0("scaled_", body_cols)
ss = copy(monsters[rank %in% c("SS"), ])
ss[, icon := str_replace(icon, 'width=\"40\"', 'width=\"120\"')]
ss[, c(scaled_body_cols) := lapply(.SD, scale), .SDcols = body_cols]
ss_dist = dist(ss[, scaled_body_cols, with = FALSE])
fit = cmdscale(ss_dist, eig = TRUE, k=2)
plot_dt = data.table(Dim1 = fit$points[,1],
Dim2 = fit$points[,2],
Dim2.html.tooltip = ss$icon)
plot_out = gvisScatterChart(plot_dt, options=list(tooltip="{isHtml:'true'}",
width = 500, height = 500,
legend = '{"position": "none"}'))
GOF measures the goodness of fit of Multidimensional scaling
g.i = (sum{j=1..k} λ[j]) / (sum{j=1..n} T.i(λ[j])), where:
fit$GOF
## [1] 0.667616 0.667616
print(plot_out, "chart")
ss_type_mean = ss[, lapply(.SD, mean),
by = c("type"),
.SDcols = body_cols]
ss_type_mean = dcast(melt(ss_type_mean, id.vars = "type"), variable ~ type)
plot_out = gvisLineChart(ss_type_mean, xvar = "variable",
options = list(height = 300, width = 800))
print(plot_out, "chart")
Skills of monsters can be found in this page.
skills = fread("data/skills.csv")
skills[, skill_mp := str_replace_all(skill, "^.*\\(消費MP:|\\)$", "")]
skills = suppressWarnings(merge(skills,
monsters[, .(monster = gsub("<a href=\\'.*\\'>|</a>", "",
monsters$name), rank, name, system, icon)],
by = "monster", all.x = TRUE, all.y = FALSE))
stable = gvisTable(skills, options = list(page = "enable",
pageSize = 5))
print(stable, "chart")
print_bar(skills, "skill_type")
skill_count = skills[, .(count = .N), by = "skill"][order(-count)]
sctable = gvisTable(skill_count, options = list(page = "enable",
pageSize = 10))
print(sctable, "chart")
ss_skills = copy(skills[rank %in% c("SS"),])
monster_skill = table(ss_skills$monster, ss_skills$skill_type)
skills_dist = dist(monster_skill, method = "jaccard")
ss_skills_dist = dist(monster_skill, method = "jaccard")
fit = cmdscale(ss_skills_dist, eig = TRUE, k=2)
plot_dt = data.table(Dim1 = fit$points[,1],
Dim2 = fit$points[,2],
Dim2.html.tooltip = ss$icon)
plot_out = gvisScatterChart(plot_dt, options=list(tooltip="{isHtml:'true'}",
width = 500, height = 500,
legend = '{"position": "none"}'))
The GOF is around 0.5.
fit$GOF
## [1] 0.4727897 0.4954645
print(plot_out, "chart")
ss_skills = copy(skills[rank %in% c("SS"),])
monster_skill = table(ss_skills$system, ss_skills$skill_type)
cafit = ca(monster_skill)
ca_dt = rbind(data.table(Dim1 = cafit$rowcoord[, 1],
system = cafit$rowcoord[, 2],
system.html.tooltip = rownames(monster_skill),
skilltype = rep(NA, nrow(monster_skill)),
skilltype.html.tooltip = rep(NA, nrow(monster_skill))),
data.table(Dim1 = cafit$colcoord[, 1],
system = rep(NA, ncol(monster_skill)),
system.html.tooltip = rep(NA, ncol(monster_skill)),
skilltype = cafit$colcoord[, 2],
skilltype.html.tooltip = colnames(monster_skill)))
tick_str = "{'ticks': [-5, -4, -3, -2, -1, 0, 1, 2] }"
ca_plot = gvisScatterChart(ca_dt, options = list(width = 500, height = 500,
hAxis = tick_str, vAxis = tick_str))
The cummulative percentage of inertia of two dimension is about 56 %.
cafit
## ## Principal inertias (eigenvalues): ## 1 2 3 4 5 6 7 ## Value 0.289277 0.219797 0.156037 0.10297 0.075914 0.044696 0.024312 ## Percentage 31.68% 24.07% 17.09% 11.28% 8.31% 4.9% 2.66% ## ## ## Rows: ## 悪魔系 物質系 ドラゴン系 スライム系 ???系 ゾンビ系 ## Mass 0.131148 0.114754 0.098361 0.073770 0.278689 0.081967 ## ChiDist 0.973843 0.947616 1.372804 1.100903 0.552119 0.846174 ## Inertia 0.124376 0.103046 0.185370 0.089409 0.084954 0.058689 ## Dim. 1 -1.095803 0.797867 1.511370 -1.207212 -0.680161 0.110800 ## Dim. 2 -0.851758 0.766243 1.809836 0.355047 0.304241 0.043428 ## 自然系 魔獣系 ## Mass 0.073770 0.147541 ## ChiDist 1.624782 0.700549 ## Inertia 0.194748 0.072409 ## Dim. 1 2.028826 0.158290 ## Dim. 2 -2.392724 -0.625370 ## ## ## Columns: ## 体技回復 体技攻撃 体技特殊 体技状態異常 体技補助アップ ## Mass 0.008197 0.040984 0.024590 0.024590 0.057377 ## ChiDist 1.608799 1.003560 1.160973 1.200830 1.053975 ## Inertia 0.021215 0.041276 0.033144 0.035459 0.063738 ## Dim. 1 -1.264605 -0.091430 -0.744969 -1.522203 0.544751 ## Dim. 2 0.648943 0.052891 -0.012008 -0.172969 1.863531 ## 回復呪文 ブレス攻撃 攻撃呪文 斬撃攻撃 斬撃状態異常 特殊呪文 ## Mass 0.049180 0.098361 0.180328 0.147541 0.295082 0.016393 ## ChiDist 0.922279 0.961557 0.806062 0.551263 0.496474 1.812718 ## Inertia 0.041833 0.090944 0.117165 0.044836 0.072734 0.053868 ## Dim. 1 -1.180605 0.838671 -1.364697 0.009026 0.622830 -1.754572 ## Dim. 2 -0.578102 0.538842 -0.416508 0.371013 0.017035 0.703128 ## ブレス状態異常 ブレス補助 補助アップ呪文 踊り状態異常 ## Mass 0.008197 0.008197 0.024590 0.016393 ## ChiDist 3.543382 3.027650 1.451372 2.020726 ## Inertia 0.102914 0.075137 0.051799 0.066940 ## Dim. 1 3.772144 2.810051 0.676350 2.033224 ## Dim. 2 -5.103665 3.860368 -2.751457 -3.218788
print(ca_plot, "chart")
謝謝大家(有機會到這一頁嗎XD)